*! version 1.0.5  22june2017  Michael Stepner, stepner@mit.edu

/*** Unlicence (abridged):
This is free and unencumbered software released into the public domain.
It is provided "AS IS", without warranty of any kind.

For the full legal text of the Unlicense, see <http://unlicense.org>
*/

* Why did I include a formal license? Jeff Atwood gives good reasons:
*  https://blog.codinghorror.com/pick-a-license-any-license/


program define maptile, rclass
	version 11
	
	set more off

	syntax varname(numeric) [if] [in], GEOgraphy(string) [ twopt(string asis) ///
		Nquantiles(integer 6) CUTPoints(varname numeric) CUTValues(numlist ascending) ///
		FColor(string asis) RANGEColor(string asis) REVcolor PROPcolor SHRINKColorscale(real 1) NDFcolor(string) ///
		LEGDecimals(string) LEGFormat(string) ///
		SAVEgraph(string) replace RESolution(real 1) ///
		mapif(string) spopt(string asis) geofolder(string) hasdatabase ///
		*]
	
	preserve
	
	* Set default geofolder
	if (`"`geofolder'"'=="") local geofolder `c(sysdir_personal)'maptile_geographies

	* Load the code for the specified geography
	cap confirm file `"`geofolder'/`geography'_maptile.ado"'
	if (_rc!=0) {
		di as error "geography(`geography') specified, but it is not installed."
		
		if ("`geofolder'"=="`c(sysdir_personal)'maptile_geographies") di as text `"To see a list of installed geographies run: {stata maptile_geolist}"'
		else {
			di as text `"To see a list of installed geographies run:"'
			di as text `"   {stata maptile_geolist, geofolder(`geofolder')}"'
			di as text ""
		}
		exit 198
	}
	
	cap program drop _maptile_`geography'
	run `"`geofolder'/`geography'_maptile.ado"'
	cap program list _maptile_`geography'
	if (_rc!=0) {
		di as error `""`geography'_maptile.ado" was loaded from the geofolder, but it does not define a program named _maptile_`geography'"'
		exit 198
	}
	
	* Check whether using an outdated geography template (one from before v0.80beta)
	cap _maptile_`geography', clopt(test)
	if (_rc==198) {
		di as error `"The geography template {bf:`geography'} is outdated; it will not work with the latest versions of maptile."'
		di as text `"You must update the template. If it was obtained from the {browse "http://michaelstepner.com/maptile/geographies":maptile website}, an updated version is available from there."'
		exit
	}
	
	
	* Set defaults & perform checks
	local var `varlist'
	local geooptions `options'
	
	if ("`replace'"=="") & (`"`savegraph'"'!="") {
		if regexm(`"`savegraph'"',"\.[a-zA-Z0-9]+$") confirm new file `"`savegraph'"'
		else confirm new file `"`savegraph'.gph"'
	}

	if ("`legdecimals'"!="") {
		if real("`legdecimals'")<0 | missing(real("`legdecimals'")) | int(real("`legdecimals'"))!=real("`legdecimals'") {
			di as error "legdecimals() must be an integer >=0"
			exit 198
		}
		if ("`legformat'"!="") {
			di as error "Cannot specify both legdecimals() and legformat()"
			exit 198
		}
		local legformat %12.`legdecimals'fc
	}
	
	if (`nquantiles'!=6)+("`cutpoints'"!="")+("`cutvalues'"!="")>1 {
		di as error "can only specify one of nquantiles(), cutpoints(), cutvalues()"
		exit 198
	}
	
	if (`resolution'<=0) {
		di as error "resolution() must be a number greater than 0"
		exit 198
	}
	
	if (`"`fcolor'"'!="") {
		if ("`revcolor'"!="") {
			di as error "cannot specify revcolor with fcolor()"
			exit 198
		}
		if ("`propcolor'"!="") {
			di as error "cannot specify propcolor with fcolor()"
			exit 198
		}
		if (`shrinkcolorscale'!=1) {
			di as error "cannot specify shrinkcolorscale() with fcolor()"
			exit 198
		}
		if (`"`rangecolor'"'!="") {
			di as error "cannot specify rangecolor() with fcolor()"
			exit 198
		}	
	}
	
	if (`"`ndfcolor'"'=="") local ndfcolor gs12
	
	if (`shrinkcolorscale'>1) | (`shrinkcolorscale'<=0) {
		di as error "shrinkcolorscale() must be greater than 0 and less than or equal to 1"
		exit 198
	}
	
	if (`"`mapif'"'!="") local map_restriction if (`mapif')
	
	
	* Specify color gradient boundaries
	if `"`rangecolor'"'=="" {
	
		* default: yellow*0.1 -> red*1.65
		local low_r=255
		local low_g=255
		local low_b=0
		
		local high_r=255
		local high_g=0
		local high_b=0
		
		local low_intensity=.1
		local high_intensity=1.65
		
	}
	else if `:word count `rangecolor''!=2 {
		di as error `"rangecolor() must contain exactly two colorstyles, e.g. <yellow red> or <"255 255 0" "255 255 0">"'
		exit 198
	}
	else {
		local low_str : word 1 of `rangecolor'
		local high_str : word 2 of `rangecolor'
		
		foreach i in low high {
			local starpos = strpos("``i'_str'","*")
			if `starpos'>0 {
				local `i'_color=substr("``i'_str'",1,`starpos'-1)
				local `i'_intensity=substr("``i'_str'",`starpos'+1,.)
			}
			else {
				local `i'_color ``i'_str'
				local `i'_intensity=1
			}
			
			* Check intensity is valid
			if !inrange(real("``i'_intensity'"),0,255) {
				di as error `"'``i'_intensity'' is not a valid color intensity. Must be a number between 0 and 255."'
				exit 198
			}
			
			* Convert colorstyle to RGB
			gr_setscheme , refscheme
			color_load ``i'_color'
			local `i'_r : word 1 of `s(rgb)'
			local `i'_g : word 2 of `s(rgb)'
			local `i'_b : word 3 of `s(rgb)'
		}
	}


	* Restrict sample
	if `"`if'`in'"'!="" {
		marksample touse
		qui replace `var'=. if !`touse'
	}


	* Set nquantiles, break points, indicators for whether bin exists
	tempname clbreaks binexists
	
	if ("`cutpoints'"!="") {
	
		* Find quantile boundaries from cutpoints var
		mata: st_matrix("`clbreaks'",excludemissing(sort(st_data(.,st_varindex("`cutpoints'")),1)))
		matrix colnames `clbreaks' = cutpoints

		* Update nquantiles
		local nquantiles=rowsof(`clbreaks')+1
		if `nquantiles'==1 {
			di as error "cutpoints() all missing"
			exit 2000
		}
		
		* Skip bins only due to duplicate quantiles
		matrix `binexists'=J(`nquantiles',1,1)
		forvalues i=2/`nquantiles-1' {
			if (`clbreaks'[`i',1]==`clbreaks'[`i'-1,1]) matrix `binexists'[`i',1]=0
		}
		
	}
	else if ("`cutvalues'"!="") {
	
		* parse numlist
		numlist "`cutvalues'"
		
		* update nquantiles
		local nquantiles : word count `r(numlist)'
		local ++nquantiles
		
		* create matrix of break points
		matrix `clbreaks'=J(`=`nquantiles'-1',1,.)
		forvalues i=1/`=`nquantiles'-1' {
			matrix `clbreaks'[`i',1]=`: word `i' of `r(numlist)''
		}
		
		matrix colnames `clbreaks' = cutvalues
		
		* don't skip any bins, even if they are empty
		matrix `binexists'=J(`nquantiles',1,1)
		
	}
	else { /* NQUANTILES */
		
		* Prepare empty matrix of break points & empty matrix of indicators for whether a bin is non-empty
		matrix `clbreaks'=J(`=`nquantiles'-1',1,.)
		matrix `binexists'=J(`nquantiles',1,0)

		* Create quantile category var
		tempvar qcatvar
		fastxtile `qcatvar'=`var', nq(`nquantiles')
		
		* Store quantile boundaries in list
		forvalues i=1/`=`nquantiles'-1' {
			matrix `clbreaks'[`i',1]=r(r`i')
		}
		
		matrix colnames `clbreaks' = `var'
		
		* Fill indicators for non-empty bins
		tempname binnums
		qui tab `qcatvar' `map_restriction', matrow(`binnums')
		
		forvalues i=1/`r(r)' {
			matrix `binexists'[`binnums'[`i',1],1]=1
		}

	}
	
	* Merge in database (polygon id variable)
	if (`"`if'`in'"'!="") qui keep if `touse' // in order to merge 1:m
	if ("`hasdatabase'"=="") qui _maptile_`geography', mergedatabase geofolder(`geofolder') `geooptions'


	* Calculate min/max
	tempname min max
	qui sum `var', meanonly
	scalar `min'=min(r(min),`clbreaks'[1,1])
	scalar `max'=max(r(max),`clbreaks'[`nquantiles'-1,1])
	
	* Choose legend format
	if ("`legformat'"=="") {
	
		* Define locals that point to first and last breakpoint
		local rsmall min(abs(`min'),abs(`max'))
		local rbig max(abs(`min'),abs(`max'))
		
		* Check if all breakpoints are integers
		local rinteger=1
		if (`min'!=int(`min')) local rinteger=0
		if (`max'!=int(`max')) local rinteger=0
		forvalues i=1/`=`nquantiles'-1' {
			if (`clbreaks'[`i',1]!=int(`clbreaks'[`i',1])) local rinteger=0
		}
		
		* Choose a nice format for decimals
		if (`rbig'>=10^7) local legformat %12.1e
		else if (`rinteger'==1) local legformat %12.0fc
		else if (`rbig'>=1000) local legformat %12.0fc
		else if (`rbig'>=100) local legformat %12.1fc
		else if (`rbig'>=1) local legformat %12.2fc
		else if (`rsmall'>=0.01) local legformat %12.3fc
		else if (`rsmall'>=0.001) & (`max'-`min'>=0.001*`nquantiles'*2) local legformat %12.3fc
		else if (`rsmall'>=0.0001) & (`max'-`min'>=0.0001*`nquantiles'*2) local legformat %12.4fc
		else local legformat %12.1e
	}
	format `var' `legformat'

		
	* Place each bin appropriately on the color gradient, if colors not manually specified
	if (`"`fcolor'"'=="") {
		local mapcolors ""
			
		* If doing proportional color scaling, calculate median value within each quantile
		if ("`propcolor'"!="") {
			tempname quantile_vals
			matrix `quantile_vals'=J(`nquantiles',1,.)
		
			forvalues i=1/`nquantiles' {
			
				if "`cutpoints'`cutvalues'"!="" {
					if (`i'==1) 					qui _pctile `var' if `var'<=`clbreaks'[1,1], percentiles(50)
					else if (`i'==`nquantiles')		qui _pctile `var' if `var'>`clbreaks'[`=`nquantiles'-1',1], percentiles(50)
					else 							qui _pctile `var' if `var'>`clbreaks'[`i'-1,1] & `var'<=`clbreaks'[`i',1], percentiles(50)
				}
				else qui _pctile `var' if `qcatvar'==`i', percentiles(50)
				
				if !mi(r(r1)) matrix `quantile_vals'[`i',1]=r(r1)
				else { /* no data, so pick the midpoint of the interval */
					if (`i'==1) 					matrix `quantile_vals'[`i',1]= `clbreaks'[1,1]
					else if (`i'==`nquantiles')		matrix `quantile_vals'[`i',1]= `clbreaks'[`=`nquantiles'-1',1]
					else 							matrix `quantile_vals'[`i',1]= (`clbreaks'[`i'-1,1]+`clbreaks'[`i',1])/2
				}
				
			}

			tempname QV_min QV_length
			scalar `QV_min'=`quantile_vals'[1,1]
			scalar `QV_length'=`quantile_vals'[`nquantiles',1]-`QV_min'
		}
		
		* Reverse color order if needed
		if ("`revcolor'"!="") local flipweights="1 -"
		
		* Compute RGB color values
		forvalues i=1/`nquantiles' {
		
			* Skip this bin if it is empty
			if (`binexists'[`i',1]==0) continue
		
			* Set the spacings between each color
			if ("`propcolor'"!="") local weight_high=( `flipweights' (`quantile_vals'[`i',1]-`QV_min')/`QV_length' ) * `shrinkcolorscale' + (1-`shrinkcolorscale')/2
			else local weight_high=( `flipweights' (`i'-1)/(`nquantiles'-1) ) * `shrinkcolorscale' + (1-`shrinkcolorscale')/2
			
			* Stretch the color spectrum as desired. In default colour space, this is expanding the yellows, shrinking the reds.
			local cos_weight_high=1 - cos( `weight_high' * c(pi) / 2 )
			local mixed_weight_high=(3*`weight_high'+`cos_weight_high')/4
			
			* Compute color components
			foreach component in r g b {
				local cur_`component'=round(`low_`component''*(1-`cos_weight_high')+`high_`component''*`cos_weight_high')
			}
			local cur_intensity=`low_intensity'*(1-`mixed_weight_high')+`high_intensity'*`mixed_weight_high'
		
			* Store this color in the list
			local mapcolors `"`mapcolors' "`cur_r' `cur_g' `cur_b'*`cur_intensity'""'
			
		}
		
	}
	else local mapcolors `fcolor'
	
	
	* Convert clbreaks matrix to string
	local clbreaks_str ""
	forvalues i=1/`=`nquantiles'-1' {
	
		* Skip break if it's a duplicate
		if (`binexists'[`i',1]==0) continue
		
		local clbreaks_str `clbreaks_str' `=`clbreaks'[`i',1]'
	}
	
	* Determine legend style specified in spopt()
	local 0 ,`spopt'
	syntax , [legstyle(numlist max=1 >=0 <=3) legjunction(string) *]
	
	* Prepare maptile specification
	if "`cutpoints'`cutvalues'"!="" {
	
		* Avoid min or max creating duplicate clbreak
		if (`min'==`clbreaks'[1,1]) scalar `min'=`min'-epsfloat()
		if (`max'==`clbreaks'[`nquantiles'-1,1]) scalar `max'=`max'+epsfloat()

		* Prepare clmethod
		local clopt clmethod(custom) clbreaks(`=`min'' `clbreaks_str' `=`max'')
		local spmapvar `var'
		
		* Prepare legend
		if "`legstyle'"=="" local legopt `legopt' legstyle(2)
		if "`legjunction'"=="" local legopt `legopt' legjunction(" {&minus} ")	
			
	}
	else {
	
		* Prepare clmethod
		local clopt clmethod(unique)
		local spmapvar `qcatvar'
		
		* Prepare legend
		if "`legstyle'"=="" local legstyle 2
		if "`legjunction'"=="" local legjunction " {&minus} "
		
		tempname leglabel
		forvalues i=1/`nquantiles' {
			
			if (`legstyle'==0) label define `leglabel' `i' "", add
			else if (`legstyle'==1) {
				if (`i'==1) label define `leglabel' `i' "[`:display string(`min',"`legformat'")',`:display string(`clbreaks'[`i',1],"`legformat'")']", add
				else if (`i'==`nquantiles') label define `leglabel' `i' "(`:display string(`clbreaks'[`i'-1,1],"`legformat'")',`:display string(`max',"`legformat'")']", add
				else label define `leglabel' `i' "(`:display string(`clbreaks'[`i'-1,1],"`legformat'")',`:display string(`clbreaks'[`i',1],"`legformat'")']", add
			}
			else if (`legstyle'==2) {
				if (`i'==1) label define `leglabel' `i' "`:display string(`min',"`legformat'")'`legjunction'`:display string(`clbreaks'[`i',1],"`legformat'")'", add
				else if (`i'==`nquantiles') label define `leglabel' `i' "`:display string(`clbreaks'[`i'-1,1],"`legformat'")'`legjunction'`:display string(`max',"`legformat'")'", add
				else label define `leglabel' `i' "`:display string(`clbreaks'[`i'-1,1],"`legformat'")'`legjunction'`:display string(`clbreaks'[`i',1],"`legformat'")'", add
			}
			else if (`legstyle'==3) {
				if (`i'==1) label define `leglabel' `i' "`:display string(`min',"`legformat'")'", add
				else if (`i'==`nquantiles') label define `leglabel' `i' "`:display string(`max',"`legformat'")'", add
				else label define `leglabel' `i' "", add
			}
			
		}
		
		label values `qcatvar' `leglabel'
		local legopt legorder(hilo) legend(rowgap(0))
	}
	
	* Make map
	_maptile_`geography', map geofolder(`geofolder') ///
		var(`var') ///
		binvar(`qcatvar') ///
		spmapvar(`spmapvar') ///
		min(`=`min'') clbreaks(`clbreaks_str') max(`=`max'') ///
		clopt(`clopt') ///
		legopt(`"`legopt'"') ///
		mapcolors(`mapcolors') ndfcolor(`ndfcolor') ///
		savegraph(`savegraph') `replace' resolution(`resolution') ///
		map_restriction(`"`map_restriction'"') ///
		spopt(`"`spopt' `twopt'"') ///
		`geooptions'
	
	
	* Return objects
	
	cap confirm matrix `quantile_vals'
	if (_rc==0) return matrix midpoints= `quantile_vals'
	
	return matrix breaks=`clbreaks'
		
end


*** Helper programs

* color_load, borrowed from palette.ado version 1.0.11  26jan2012
program color_load , sclass
	tempname mycolor
	.`mycolor' = .color.new , style(`0')
	sret local rgb "`.`mycolor'.setting'"
	sret local color `""`0'""'
end

* Mata function excludemissing()


version 11
set matastrict on

mata:

numeric matrix excludemissing(numeric matrix A) {
	return(select(A, rowmissing(A):==0))
}

end

* fastxtile version 1.22  24jul2014  Michael Stepner, stepner@mit.edu
program define fastxtile, rclass
	version 11

	* Parse weights, if any
	_parsewt "aweight fweight pweight" `0' 
	local 0  "`s(newcmd)'" /* command minus weight statement */
	local wt "`s(weight)'"  /* contains [weight=exp] or nothing */

	* Extract parameters
	syntax newvarname=/exp [if] [in] [,Nquantiles(integer 2) Cutpoints(varname numeric) ALTdef ///
		CUTValues(numlist ascending) randvar(varname numeric) randcut(real 1) randn(integer -1)]

	* Mark observations which will be placed in quantiles
	marksample touse, novarlist
	markout `touse' `exp'
	qui count if `touse'
	local popsize=r(N)

	if "`cutpoints'"=="" & "`cutvalues'"=="" { /***** NQUANTILES *****/
		if `"`wt'"'!="" & "`altdef'"!="" {
			di as error "altdef option cannot be used with weights"
			exit 198
		}
		
		if `randn'!=-1 {
			if `randcut'!=1 {
				di as error "cannot specify both randcut() and randn()"
				exit 198
			}
			else if `randn'<1 {
				di as error "randn() must be a positive integer"
				exit 198
			}
			else if `randn'>`popsize' {
				di as text "randn() is larger than the population. using the full population."
				local randvar=""
			}
			else {
				local randcut=`randn'/`popsize'
				
				if "`randvar'"!="" {
					qui sum `randvar', meanonly
					if r(min)<0 | r(max)>1 {
						di as error "with randn(), the randvar specified must be in [0,1] and ought to be uniformly distributed"
						exit 198
					}
				}
			}
		}

		* Check if need to gen a temporary uniform random var
		if "`randvar'"=="" {
			if (`randcut'<1 & `randcut'>0) { 
				tempvar randvar
				gen `randvar'=runiform()
			}
			* randcut sanity check
			else if `randcut'!=1 {
				di as error "if randcut() is specified without randvar(), a uniform r.v. will be generated and randcut() must be in (0,1)"
				exit 198
			}
		}

		* Mark observations used to calculate quantile boundaries
		if ("`randvar'"!="") {
			tempvar randsample
			mark `randsample' `wt' if `touse' & `randvar'<=`randcut'
		}
		else {
			local randsample `touse'
		}

		* Error checks
		qui count if `randsample'
		local samplesize=r(N)
		if (`nquantiles' > r(N) + 1) {
			if ("`randvar'"=="") di as error "nquantiles() must be less than or equal to the number of observations [`r(N)'] plus one"
			else di as error "nquantiles() must be less than or equal to the number of sampled observations [`r(N)'] plus one"
			exit 198
		}
		else if (`nquantiles' < 2) {
			di as error "nquantiles() must be greater than or equal to 2"
			exit 198
		}

		* Compute quantile boundaries
		_pctile `exp' if `randsample' `wt', nq(`nquantiles') `altdef'

		* Store quantile boundaries in list
		local maxlist 248
		forvalues i=1/`=`nquantiles'-1' {
			local cutvallist`=ceil(`i'/`maxlist')' `cutvallist`=ceil(`i'/`maxlist')'' r(r`i')
		}
	}
	else if "`cutpoints'"!="" { /***** CUTPOINTS *****/
	
		* Parameter checks
		if "`cutvalues'"!="" {
			di as error "cannot specify both cutpoints() and cutvalues()"
			exit 198
		}		
		if "`wt'"!="" | "`randvar'"!="" | "`ALTdef'"!="" | `randcut'!=1 | `nquantiles'!=2 | `randn'!=-1 {
			di as error "cutpoints() cannot be used with nquantiles(), altdef, randvar(), randcut(), randn() or weights"
			exit 198
		}

		* Find quantile boundaries from cutpoints var
		mata: process_cutp_var("`cutpoints'")

		* Store quantile boundaries in list
		if r(nq)==1 {
			di as error "cutpoints() all missing"
			exit 2000
		}
		else {
			local nquantiles = r(nq)
			
			local maxlist 248
			forvalues i=1/`=`nquantiles'-1' {
				local cutvallist`=ceil(`i'/`maxlist')' `cutvallist`=ceil(`i'/`maxlist')'' r(r`i')
			}
		}
	}
	else { /***** CUTVALUES *****/
		if "`wt'"!="" | "`randvar'"!="" | "`ALTdef'"!="" | `randcut'!=1 | `nquantiles'!=2 | `randn'!=-1 {
			di as error "cutvalues() cannot be used with nquantiles(), altdef, randvar(), randcut(), randn() or weights"
			exit 198
		}
		
		* parse numlist
		numlist "`cutvalues'"
		local maxlist=-1
		local cutvallist1 `"`r(numlist)'"'
		local nquantiles : word count `r(numlist)'
		local ++nquantiles
	}

	* Pick data type for quantile variable
	if (`nquantiles'<=maxbyte()) local qtype byte
	else if (`nquantiles'<=maxint()) local qtype int
	else local qtype long

	* Create quantile variable
	local cutvalcommalist : subinstr local cutvallist1 " " ",", all
	qui gen `qtype' `varlist'=1+irecode(`exp',`cutvalcommalist') if `touse'
	
	forvalues i=2/`=ceil((`nquantiles'-1)/`maxlist')' {
		local cutvalcommalist : subinstr local cutvallist`i' " " ",", all
		qui replace `varlist'=1 + `maxlist'*(`i'-1) + irecode(`exp',`cutvalcommalist') if `varlist'==1 + `maxlist'*(`i'-1)
	}

	label var `varlist' "`nquantiles' quantiles of `exp'"

	* Return values
	if ("`samplesize'"!="") return scalar n = `samplesize'
	else return scalar n = .
	
	return scalar N = `popsize'
	
	local c=`nquantiles'-1
	forvalues j=`=max(ceil((`nquantiles'-1)/`maxlist'),1)'(-1)1 {
		tokenize `"`cutvallist`j''"'
		forvalues i=`: word count `cutvallist`j'''(-1)1 {
			return scalar r`c' = ``i''
			local --c
		}
	}

end


version 11
set matastrict on

mata:

void process_cutp_var(string scalar var) {

	// Load and sort cutpoints	
	real colvector cutp
	cutp=sort(st_data(.,st_varindex(var)),1)
	
	// Return them to Stata
	stata("clear results")
	real scalar ind
	ind=1
	while (cutp[ind]!=.) {
		st_numscalar("r(r"+strofreal(ind)+")",cutp[ind])
		ind=ind+1
	}
	st_numscalar("r(nq)",ind)
	
}

end
